#!/usr/bin/python
# -*- coding: iso-8859-15 -*-

###########################
### Autor: Sebastian Enger / M.Sc.
### Copyright: Sebastian Enger
### Licence: Commercial / OneTipp
### Version: 1.0.3  - 11-10-2015@21:53 Uhr
### Contact: sebastian.enger@gmail.com
### OneTipp Text Tool in Python
###########################

######## export PYTHON_EGG_CACHE=/tmp

import os
import pprint
import nltk
#import rocksdb                                         # shared library kann aktuell noch nicht gelesen werden
import MySQLdb                                          # apt-get install python-mysqldb
from sphinxit.core.processor import Search              # http://sphinxit.readthedocs.org/en/latest/
from sphinxit.core.helpers import BaseSearchConfig
import random
import codecs
import sys

os.environ['PYTHON_EGG_CACHE'] = '/tmp'


from nltk.tokenize import sent_tokenize
###python -m nltk.downloader -d /usr/share/nltk_data all
####python -m nltk.downloader all
###########nltk.download()

reload(sys)
sys.setdefaultencoding('latin-1')

class SphinxitConfig(BaseSearchConfig):
    DEBUG = True
    WITH_META = True
    WITH_STATUS = True
    POOL_SIZE = 5
   # SQL_ENGINE = 'oursql'
    SEARCHD_CONNECTION = {
        'host': '127.0.0.1',
        'port': 9977,
    }

pp = pprint.PrettyPrinter(indent=4)
#delimiters      = ['\n', ' ', ',', '.', '?', '!', ':', ';', '\s', '\t', '\r']
# http://pyrocksdb.readthedocs.org/en/v0.4/tutorial/index.html
# https://github.com/sphinxsearch/sphinx/blob/master/api/sphinxapi.py
# http://www.tutorialspoint.com/python/python_database_access.htm
#mysql = MySQLdb.connect("localhost","root","###########99","onetipp" ) # last working
sphinx = MySQLdb.connect(
    host    = '127.0.0.1',
    user    = 'root',
    passwd  = '###########99',
    db      = 'onetipp',
    port    = 9977) # sphinxQL

cursorSphinx = sphinx.cursor()

mysql = MySQLdb.connect(
    host    = '127.0.0.1',
    user    = 'root',
    passwd  = '###########99',
    db      = 'onetipp',
    port    = 3306) # Mysql

cursorMysql = mysql.cursor()


inputfile = sys.argv[1]
outputfile = sys.argv[2]

# http://www.tutorialspoint.com/python/python_command_line_arguments.htm
# read file into string
text            = open(inputfile, 'r').read()
text.decode('latin-1')

#sent_tokenize_list = sent_tokenize(text)
tokens = nltk.word_tokenize(text)
#pp.pprint(tokens)

count = -1
skip = 0

for word in tokens:
    count += 1

    if word.istitle():
        # 1. check if NamensDB eintrag -> y: write protect this entry
        # 2. check if Synonym_Unique -> y: take syononmy rand[0-4] -> 4 if > then 4 synonyms

        search_query = Search(indexes=['onetipp_name'], config=SphinxitConfig)
        search_query = search_query.match(word).options(
            ranker='proximity_bm25',
            max_matches=1,
            field_weights={'name': 100},
         )
        sphinx_result = search_query.ask()
     #   pp.pprint(sphinx_result)

        if 'result' in sphinx_result:
            if 'items' in sphinx_result['result']:
                skip = 0
                for ele in sphinx_result['result']['items']:
                    skip = ele['id']
                #    print "NameDB has been found: ", skip

                    # es wurde ein namen gefunden -> kein synonym austauschen
                    if skip>0:
                        search_query_syn = Search(indexes=['onetipp_syn'], config=SphinxitConfig)
                        search_query_syn = search_query_syn.match(word).options(
                            ranker='proximity_bm25',
                            max_matches=1,
                            field_weights={'synonyms': 100},
                         )
                        sphinx_result_syn = search_query_syn.ask()
                   #     pp.pprint(sphinx_result_syn)

                        if 'result' in sphinx_result_syn:
                            if 'items' in sphinx_result_syn['result']:
                                for eleSP in sphinx_result_syn['result']['items']:

                                    synID = eleSP['id']
                                #    print "SynDB has been found: ", synID

                                    sql = "SELECT synonyms FROM (synonym_unique) WHERE uid= %s" % (synID)
                                    cursorMysql.execute(sql)
                                    syn_content = cursorMysql.fetchone()
                                 #   pp.pprint(syn_content)

                                    synwords        = syn_content[0].split(";")
                                #    tokens[count]   = '<b style="color:#87CEFA;">' +random.choice(synwords)+"</b>"
                                    tokens[count]   = '<b style="color:#87CEFA;">' +synwords[0]+"</b>"
                                    continue
    """
        search_query_syn2 = Search(indexes=['onetipp_syn'], config=SphinxitConfig)
        search_query_syn2 = search_query_syn2.match(word).options(
            ranker='proximity_bm25',
            max_matches=1,
            field_weights={'synonyms': 100},
        )
        sphinx_result_syn2 = search_query_syn2.ask()
        # pp.pprint(sphinx_result_syn)

        if 'result' in sphinx_result_syn2:
            if 'items' in sphinx_result_syn2['result']:
                for eleSP2 in sphinx_result_syn2['result']['items']:

                    synID2 = eleSP2['id']
                #    print "SynDB has been found: ", synID

                    sql2 = "SELECT synonyms FROM (synonym_unique) WHERE uid= %s" % (synID2)
                    cursorMysql.execute(sql2)
                    syn_content2 = cursorMysql.fetchone()
                    #pp.pprint(syn_content2)
                    #exit

                    synwords2        = syn_content2[0].split(";")
                    tokens[count]   = '<b style="color:#87CEFA;">' +random.choice(synwords2)+ '</b>'
    """

# file schreiben
outputtext  = ' '.join(tokens)
with codecs.open(outputfile,'w') as f:
    f.write(outputtext)
    f.close()

mysql.close()

#print outputtext

exit(0);